Merge Desjardins and Ashton trees

Author

Claudia Zirión-Martínez

Published

February 10, 2025

Setup

Libraries

Code
library(RRphylo)
library(manipulate)
library(ape)
library(phytools)
library(ggtree)
library(tidyverse)
library(RColorBrewer)
library(ggnewscale)
library(patchwork)
source("scripts/metadata_colors.R")

Paths

Code
metadata_ashton_desj_all_weavepop_H99 <- 
    "data/processed/metadata_ashton_desj_all_weavepop_final_H99.csv"
desj_tree_path <- 
    "data/raw/CryptoDiversity_Desjardins_Tree.tre"
desj_tree_out_path <- 
    "data/processed/tree_desjardins.newick"
desj_tree_out_plot <- 
    "results/trees/tree_desjardins.png"
desj_tree_out_plot_pdf <- 
    "results/trees/tree_desjardins.pdf"

ashton_tree_path <- 
    "data/raw/2017.06.09.all_ours_and_desj.snp_sites.mod.fa.cln.tree"
ashton_metadata_path <- 
    "../Crypto_Ashton/config/metadata_all_ashton_and_vni_desj.csv"
ashton_tree_out_path <- 
    "data/processed/tree_ashton.newick"
ashton_tree_unrooted_plot <- 
    "results/trees/tree_ashton_unrooted.png"
ashton_tree_rooted_plot_pdf <- 
    "results/trees/tree_ashton_rect.pdf"
ashton_tree_out_plot <- 
    "results/trees/tree_ashton.png"
ashton_tree_out_plot_pdf <- 
    "results/trees/tree_ashton.pdf"

merged_tree_out_path <- 
    "data/processed/tree_merged.newick"
merged_tree_branchlengths_plot <- 
    "results/trees/tree_merged_branchlengths.png"
merged_tree_plot <- 
    "results/trees/tree_merged.png"
merged_tree_plot_pdf <- 
    "results/trees/tree_merged.pdf"
merged_tree_small_plot <- 
    "results/trees/tree_merged_small.png"

Metadata

Use the metadata table that has all (even the ones that will be excluded) the samples of Ashton and Desjardins and H99 (n = 1064).

Code
metadata <- read.delim(
    metadata_ashton_desj_all_weavepop_H99,
    header=TRUE,
    sep=",") %>%
    select(strain, everything())

metadata$mating_type <-ifelse(metadata$mating_type == "", NA, metadata$mating_type)
summary <- metadata %>% 
    group_by(dataset, lineage) %>% 
    summarize(count = n())
summary
dataset lineage count
Ashton VNI 646
Desjardins VNBI 120
Desjardins VNBII 62
Desjardins VNI 181
Desjardins VNII 16
Reference VNI 1

Make separate dataframes for each metadata field.

Code
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
                            levels = c(names(sublineage_colors), "VNIa-outlier"))
metadata$country_of_origin <- factor(metadata$country_of_origin,
                                levels = names(country_colors))

sublineage <- metadata %>%
                filter(lineage == "VNI")%>%
                select(strain, vni_subdivision)%>%
                column_to_rownames("strain")%>%
                droplevels()
lineage <- metadata %>%
            select(strain, lineage)%>%
            column_to_rownames("strain")
dataset <- metadata %>%
            select(strain, dataset)%>%
            column_to_rownames("strain")
source <- metadata %>%
            select(strain, source)%>%
            column_to_rownames("strain")
country <- metadata %>%
            select(strain, country_of_origin)%>%
            column_to_rownames("strain")
mating_type <- metadata %>%
            select(strain, mating_type)%>%
            column_to_rownames("strain")   
ploidy <- metadata %>%
    select(strain, ploidy)%>%
    column_to_rownames("strain")
quality <- metadata %>%
    select(strain, quality_warning)%>%
    column_to_rownames("strain")

Desjardins tree

Import the raw Desjardins tree

Code
desj_tree <- read.tree(desj_tree_path)

Reroot the tree at the middle of the branch leading to VNII

Code
VNII_root <- getMRCA(desj_tree, c("C2","C12"))
edge_length <- subset(desj_tree$edge.length, desj_tree$edge[,2] == VNII_root)
desj_tree <- reroot(desj_tree, VNII_root, edge_length/2)

Write rooted Desjardins tree

Code
write.tree(desj_tree, file = desj_tree_out_path)

Keep only the names of the countries in the Desjardins dataset to have a proper legend.

Code
country_desj <- levels(droplevels(country[rownames(country) %in% desj_tree$tip.label, ]))

Get lineage nodes for plot

Code
VNI_node <- getMRCA(desj_tree, c("Tu241-1","Bt164"))
VNII_node <- getMRCA(desj_tree, c("C2","C12"))
VNBI_node <- getMRCA(desj_tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(desj_tree, c("MW-RSA3321","MW-RSA3179"))

nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)
Code
VNIa4_node <- getMRCA(desj_tree, c("Ug2463","Bt164"))
VNIa5_node <- getMRCA(desj_tree, c("NRHc5025.ENR.CLIN.1","AD1-83a"))
VNIa93_node <- getMRCA(desj_tree, c("RTC1","Br795"))
VNIa32_node <- getMRCA(desj_tree, c("A4-34-6","In2632"))
VNIaX_node <- getMRCA(desj_tree, c("Bt48","MW-RSA36"))
VNIaY_node <- getMRCA(desj_tree, c("Bt18","Bt138"))
VNIb_node <- getMRCA(desj_tree, c("A1-84-14","MW-RSA722"))
VNIc_node <- getMRCA(desj_tree, c("Bt20","Bt11"))

nodes_vnisublineages <- data.frame(
    sublineage = c(
                "VNIa-4", "VNIa-5", "VNIa-93",
                "VNIa-32", "VNIa-X", "VNIa-Y",
                "VNIb", "VNIc"),
    mrca = c(
            VNIa4_node, VNIa5_node, VNIa93_node,
            VNIa32_node, VNIaX_node, VNIaY_node,
            VNIb_node, VNIc_node))
Code
d <- ggtree(desj_tree, 
        ladderize = TRUE,
        layout = "circular", 
        size = 0.09) %<+%  metadata +
    geom_tiplab(color = "black", size = 0.7, align = TRUE, linesize = 0.05)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 0.3, vjust = -0.5)+
    geom_hilight(data=nodes_vnisublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.8)+
        scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)+
    geom_tippoint(aes(shape = mating_type, color = source),
                size = 0.5)+
    scale_color_manual(name = "Source", values = source_colors, na.value = "gray90")+
    scale_shape_manual(name = "MAT", values = mat_shapes, na.value = 18)+
    guides(shape = guide_legend(order = 1), #override.aes = list(linewidth = 0.01),
            color = guide_legend(override.aes = list(size = 5), order = 2))+
    geom_treescale(x=-0.03, y=0, width=0.01, fontsize =2, offset=30)


dc <- gheatmap(d, country, width =.08, colnames=FALSE, offset=0.07)+
        scale_fill_manual(values=country_colors, 
                      name="Country", na.translate=TRUE,
                      limits = country_desj)+ 
        guides(fill = guide_legend(order = 3, ncol = 2))+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            fontsize = 3,
            align = TRUE, face = "bold",
            angle = "auto", offset = 0.11)+
    theme(legend.position = "bottom",
            legend.direction = "vertical")

dc

Save Desjardins tree plot to file.

Code
ggsave(desj_tree_out_plot, dc, height = 6.5, width = 7, units = "in", dpi = 600)
Code
ggsave(desj_tree_out_plot_pdf, dc, height = 6.5, width = 7, units = "in", dpi = 600)

Ashton tree

Import the raw Ashton tree

Code
ashton_tree_unrooted <- read.tree(ashton_tree_path)

Rename tips of the Desjardins samples because they have SRA run accessions in this tree but strain names in the other one

Code
ashton_tree_unrooted$tip.label <- sapply(ashton_tree_unrooted$tip.label, function(x) {
    if (x %in% metadata$run) {
        metadata$strain[metadata$run == x]
    } else {
        x
    }
})

Get the samples that are present in the tree but absent from the metadata of the full dataset

Code
tips_missing_from_final_dataset <- setdiff(ashton_tree_unrooted$tip.label, metadata$strain)

Compare the list of strains missing from metadata with the oringinal Ashton metadata

Code
ashton_metadata <-read.delim(
    ashton_metadata_path,
    header=TRUE, sep=",")
samples_missing_from_dataset <- ashton_metadata %>%
    filter(strain %in% tips_missing_from_final_dataset)%>%
    select(sample, strain, lineage, vni_subdivision)
samples_missing_from_dataset
sample strain lineage vni_subdivision
ERS1142710 20949_2#5 VNI VNIa
ERS542586 14893_3#52 VNI VNIa
ERS542587 14893_3#53 VNI VNIa
ERS542595 15277_3#45 VNI VNIa
ERS1142845 20427_3#51 VNI VNIa
ERS542490 14893_1#4 VNI VNIa
ERS542498 14893_1#12 VNI VNIa
ERS542584 15277_3#42 VNI VNIa
ERS542502 14893_1#16 VNI VNIa
SRS417657 SRR836978 VNI VNIa
SRS409113 SRR642863 VNI VNIa
SRS404447 SRR797736 VNI VNIa
ERS2541388 CNS_1403 VNI VNIa
CNS_1465 VNI VNIa
ERS1142865 20949_2#40 VNI VNIa
ERS2541312 04CN-32-011 VNI VNIa
ERS2541291 04CN-32-083 VNI VNIa
ERS2541044 04CN-64-074 VNI VNIa
ERS2540971 04CN-63-012 VNI VNIa
ERS2541138 04CN-03-053 VNI VNIa
ERS2541154 04CN-03-021 VNI VNIa
ERS542410 15277_3#5 VNI VNIa
ERS542411 15277_3#6 VNI VNIa
ERS542414 15277_3#7 VNI VNIa
ERS542415 15277_3#8 VNI VNIa
ERS542403 15277_3#1 VNI VNIa
ERS1142807 20427_3#21 VNI VNIa
ERS542456 15277_3#18 VNI VNIa
ERS2541028 04CN-64-150 VNI VNIa
ERS2541329 04CN-65-016 VNI VNIa
ERS2540924 04CN-65-040 VNI VNIa
ERS2541018 04CN-64-128 VNI VNIa
ERS2540996 04CN-65-166 VNI VNIa
ERS1142758 20427_2#61 VNI VNIa
ERS542397 14936_1#6 VNI VNIa
SRS404479 SRR797807 VNI VNIb

The CNS_1465 strain was not available for download and the rest had bad quality alignments.

Root Ashton tree at the middle of the branch leading to VNIa

Code
VNIa_root <- getMRCA(ashton_tree_unrooted, c("AD3-95a","Tu259-1"))
edge_length <- subset(ashton_tree_unrooted$edge.length, 
    ashton_tree_unrooted$edge[,2] == VNIa_root)
ashton_tree <- reroot(ashton_tree_unrooted, VNIa_root, edge_length/2)

Write rooted Ashton tree

Code
write.tree(ashton_tree, file = ashton_tree_out_path)
Code
pu <- ggtree(ashton_tree_unrooted, layout = "circular", size = 0.1) +
            geom_tippoint(aes(color = sublineage[match(label, rownames(sublineage)), "vni_subdivision"]),
                            size = 2)+
            # scale_color_manual(values = sublineage_colors, 
            #                     name="VNI Sublineage", 
            #                     na.translate = FALSE,
            #                     limits = levels(sublineage$vni_subdivision))+
            labs(title = "Unrooted")+
            theme(legend.position = "none",
                plot.title = element_text(hjust = 0.5))

p <- ggtree(ashton_tree, layout = "circular", size =0.1)+
            geom_tippoint(aes(color = sublineage[match(label, rownames(sublineage)), "vni_subdivision"]),
                            size = 2)+
            # scale_color_manual(values = sublineage_colors, 
            #                     name="VNI Sublineage", 
            #                     na.translate = FALSE,
            #                     limits = levels(sublineage$vni_subdivision))+
        labs(title = "Rooted")+
        theme(legend.position = "right",
                plot.title = element_text(hjust = 0.5),
                legend.direction = "vertical",
                legend.title = element_text(size=11),
                legend.text=element_text(size=9),
                legend.key.size = unit(0.5, "cm"))

pu | p 

Code
#ggsave(ashton_tree_rooted_plot_pdf, m5, height = 20, width = 18, units = "in", dpi = 900, limitsize = FALSE)
Code
m <- ggtree(ashton_tree, layout = "circular", size = 0.1) +  
            geom_tiplab(aes(label = label), size = 0.5, align =TRUE, 
                    linetype = "dashed", linesize = .03)+
            geom_treescale(x=-0.03, y=0, width=0.01, fontsize =2, offset=30)

m1 <- gheatmap(m, dataset, width=.05, colnames=FALSE, offset=.025) +
            scale_fill_manual(values = dataset_colors, name="Dataset", na.translate = FALSE)+
            guides(fill = guide_legend(order = 1))+
            new_scale_fill()

m2 <- gheatmap(m1, sublineage, width=.05, colnames=FALSE, offset=.035) +
            scale_fill_manual(values = sublineage_colors, name="VNI Sublineage", na.translate = FALSE,
                                limits = levels(sublineage$vni_subdivision))+
            guides(fill = guide_legend(order = 2))+
            new_scale_fill()

m3 <- gheatmap(m2, source, width=.05, colnames=FALSE, offset=0.045) +
        scale_fill_manual(values = source_colors, name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 3))+
        labs(title = "Ashton tree")+
        new_scale_fill()

m4 <- gheatmap(m3, country, width =.05, colnames=FALSE,offset=0.055)+
    scale_fill_manual(values=country_colors, 
                      name="Country", na.translate=FALSE,
                      limits = levels(country$country_of_origin))+
    guides(fill = guide_legend(order =4))+
        theme(legend.position = "right",
                legend.direction = "vertical",
                legend.title = element_text(size=9),
                legend.text=element_text(size=7),
                legend.key.size = unit(0.3, "cm"),
                plot.margin = margin(0, 0, 0, 0, "cm"))

m4
Code
ggsave(ashton_tree_out_plot, m4, height = 6.5, width = 7, units = "in", dpi = 600)
ggsave(ashton_tree_out_plot_pdf, m4, height = 6.5, width = 7, units = "in", dpi = 600)

Merge Desjardins and Ashton trees

Specify clades in Desjardins tree

Code
VNI <- c("Bt92", "Bt79")
VNI_node <- getMRCA(desj_tree, VNI)
VNII <- c("C2","C12")
VNII_node <- getMRCA(desj_tree, VNII)
VNB <- c("Bt7", "Bt34")
VNB_node <- getMRCA(desj_tree, VNB)

Get the ages of the nodes from the original Desjardins tree. This is to attempt to have a calibrated tree, but the resulting branchlengths are not accurate.

Code
edge_lengths <- node.depth.edgelength(desj_tree)
node_labels <- c(desj_tree$tip.label, desj_tree$node.label)
edge_length_mapping <- data.frame(
    node = node_labels, 
    edge_length = edge_lengths, 
    max_length = max(edge_lengths))
edge_length_mapping <- edge_length_mapping %>% 
                        mutate(age = max_length - edge_length) %>%
                        rownames_to_column("node_id")
clade_ages <- edge_length_mapping %>% 
                filter(node_id %in% c(VNI_node, VNII_node, VNB_node))
nodeages <- c("Bt92-Bt79" = clade_ages$age[clade_ages$node_id == VNI_node],
             "C2-C12" = clade_ages$age[clade_ages$node_id == VNII_node],
             "Bt7-Bt34" = clade_ages$age[clade_ages$node_id == VNB_node])
tip_ages <- edge_length_mapping %>% 
                filter(node %in% metadata$strain)
tipages <- tip_ages$age
names(tipages) <- tip_ages$node

Remove VNI clade from Desjardins tree to use it as backtree

Code
VNI_tips <- tips(desj_tree, VNI_node)
backtree <- drop.tip(desj_tree, VNI_tips)

Create the reference tables

Code
reference <- data.frame(bind=c("CNS_289-20427_2#4"),
                   reference=c("Bt7-Bt34"),
                   poly=c(FALSE))

Merge

Code
merged <- tree.merger(backbone = backtree,
                        data=reference,
                        source.tree = ashton_tree,
                        plot=FALSE,
                        node.ages = nodeages,
                        tip.ages = tipages)

Write merged tree to file

Code
write.tree(merged, file = merged_tree_out_path)

Get lineage and sublineage nodes for plot

Code
VNI_node <- getMRCA(merged, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(merged, c("C2","C12"))
VNBI_node <- getMRCA(merged, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(merged, c("MW-RSA3321","MW-RSA3179"))

nodes_lineages <- data.frame(
    lineage = c("VNI", "VNII", "VNBI", "VNBII"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node)
)
Code
VNIa4_node <- getMRCA(merged, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(merged, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(merged, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(merged, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(merged, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(merged, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(merged, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(merged, c("Bt20","Bt11"))

nodes_vnisublineages <- data.frame(
    sublineage = c(
                "VNIa-4", "VNIa-5", "VNIa-93",
                "VNIa-32", "VNIa-X", "VNIa-Y",
                "VNIb", "VNIc"),
    mrca = c(
            VNIa4_node, VNIa5_node, VNIa93_node,
            VNIa32_node, VNIaX_node, VNIaY_node,
            VNIb_node, VNIc_node))
Code
nodes_sublineages <- data.frame(
    sublineage = c("VNI", "VNII", "VNBI", "VNBII",
                "VNIa-4", "VNIa-5", "VNIa-93",
                "VNIa-32", "VNIa-X", "VNIa-Y",
                "VNIb", "VNIc"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node,
            VNIa4_node, VNIa5_node, VNIa93_node,
            VNIa32_node, VNIaX_node, VNIaY_node,
            VNIb_node, VNIc_node))

Colored branches tree

Group tree by sublineage

Code
merged <- groupClade(merged, setNames(nodes_sublineages$mrca, nodes_sublineages$sublineage))
Code
m4 <- ggtree(merged, aes(color = group), 
        layout = "circular", 
        branch.length = "none",
        size = 0.2) %<+%  metadata +
    scale_color_manual(
        name = "Sublineage",
        values = sublineage_colors)+
    guides(color = guide_legend(override.aes = list(linewidth = 3)))+
    new_scale_color()+
    geom_tiplab( size = 0.5, offset = 0.01)+
    guides(color = guide_legend(override.aes = list(size = 3)))+
    new_scale_color()+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_tippoint(aes(color = country_of_origin, shape = mating_type), size = 0.1)+
    scale_color_manual(name = "Country", values = country_colors)+
    scale_shape_manual(name = "Mating type", values = c(15,17,16))+
    guides(color = guide_legend(override.aes = list(size = 2), ncol = 2),
            shape = guide_legend(override.aes = list(linewidth = 3)))+
    theme(legend.position = "bottom",
            legend.direction = "vertical")
m4

Base

Code
m <- ggtree(merged, 
        ladderize = TRUE,
        layout = "circular", 
        branch.length = "none",
        size = 0.1) %<+%  metadata +
    geom_tiplab(color = "black", size = 0.5, offset = 0.01)+
    geom_text(aes(label = nodes_lineages$lineage[match(node, nodes_lineages$mrca)]), 
                        size = 2, , fontface = "bold",
                        hjust = 1.25, vjust = -0.5)+
    geom_hilight(data=nodes_vnisublineages, 
        aes(node=mrca, fill=sublineage), alpha = 0.8)+
        scale_fill_manual(name = "Sublineage", values = sublineage_shading)+
    guides(fill = FALSE)+
    new_scale_fill()+
    geom_tree(size = 0.1)+
    geom_tippoint(aes(shape = mating_type, color = source),
                size = 0.3)+
    scale_color_manual(name = "Source", values = source_colors, na.value = "gray90")+
    scale_shape_manual(name = "MAT", values = mat_shapes, na.value = 18)+
    guides(shape = guide_legend(order = 1), #override.aes = list(linewidth = 0.01),
            color = guide_legend(override.aes = list(size = 5), order = 2))

mc <- m + geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            angle = "auto", offset = 3)
mc

Country

Code
m1 <- gheatmap(m, country, width =.05, colnames=FALSE,offset=3)+
    scale_fill_manual(values=country_colors, 
                      name="Country", na.translate=FALSE,
                      limits = levels(country$country_of_origin))+
    guides(fill = guide_legend(order = 3))+
    new_scale_fill()+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            angle = "auto", offset = 6)

m1

Dataset

Code
m2 <- gheatmap(m, dataset, width=.05, colnames=FALSE, offset=3) +
        scale_fill_manual(values = dataset_colors, 
            name="Dataset", na.translate = FALSE)+
        guides(fill = guide_legend(order = 3))+
        new_scale_fill()

m3 <- gheatmap(m2, ploidy, width=.05, colnames=FALSE, offset=5) +
        scale_fill_brewer(name = "Ploidy", palette = "Dark2")+
        guides(fill = guide_legend(order = 4))+
        new_scale_fill()
        
m4 <- gheatmap(m3, quality, width=.05, colnames=FALSE, offset=7) +
        scale_fill_brewer(name = "Filtered", palette = "Set1") +
        guides(fill = guide_legend(order = 5))+
        new_scale_fill()+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            angle = "auto", offset = 9)
m4

Mating type

Code
m3 <- gheatmap(m, mating_type, width=.05, colnames=FALSE, offset=3) +
        scale_fill_manual(values = mat_colors, 
            name="Mating type", na.translate = FALSE)+
        guides(fill = guide_legend(order = 3))+
        new_scale_fill()+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            angle = "auto", offset = 6)
m3

Source

Code
m3 <- gheatmap(m, source, width=.05, colnames=FALSE, offset=3) +
        scale_fill_manual(values = source_colors, 
            name="Source", na.translate = FALSE)+
        guides(fill = guide_legend(order = 3))+
        new_scale_fill()+
    geom_cladelab(data = nodes_vnisublineages, 
            mapping = aes(node = mrca, label = sublineage),
            align = TRUE, face = "bold",
            angle = "auto", offset = 6)
m3

Code
ggsave(merged_tree_plot, m4, height = 10, width = 10, units = "in", dpi = 600)
ggsave(merged_tree_plot_pdf, m1, height = 10, width = 10, units = "in", dpi = 600)

Plot minimal version of the tree

Get two samples of each non-VNI lineage, VNI sublineage, and all VNIa-outlier

Code
VNIa_outlier <- metadata %>%
    filter(vni_subdivision == "VNIa-outlier")

representatives <- c("Tu241-1","UI_31647-2","C2","C12","Tu229-1",
                    "Ftc267-2","MW-RSA3321","MW-RSA3179","04CN-30-008",
                    "UI_31647-2","BMD852","14936_1#45","04CN-65-080",
                    "04CN-65-002","BMD942","BMD2801","Bt48",
                    "04CN-63-007","04CN-65-073","Bt138",
                    "04CN-65-096","MW-RSA722","Bt20","Bt11")
tips <- c(VNIa_outlier$strain, representatives)

Make a small version of the merged tree only with the tips in tips

Code
small_tree <- drop.tip(merged, setdiff(merged$tip.label, tips))
Code
VNI_node <- getMRCA(small_tree, c("Tu241-1","UI_31647-2"))
VNII_node <- getMRCA(small_tree, c("C2","C12"))
VNBI_node <- getMRCA(small_tree, c("Tu229-1","Ftc267-2"))
VNBII_node <- getMRCA(small_tree, c("MW-RSA3321","MW-RSA3179"))
VNIa4_node <- getMRCA(small_tree, c("04CN-30-008","UI_31647-2"))
VNIa5_node <- getMRCA(small_tree, c("BMD852","14936_1#45"))
VNIa93_node <- getMRCA(small_tree, c("04CN-65-080","04CN-65-002"))
VNIa32_node <- getMRCA(small_tree, c("BMD942","BMD2801"))
VNIaX_node <- getMRCA(small_tree, c("Bt48","04CN-63-007"))
VNIaY_node <- getMRCA(small_tree, c("04CN-65-073","Bt138"))
VNIb_node <- getMRCA(small_tree, c("04CN-65-096","MW-RSA722"))
VNIc_node <- getMRCA(small_tree, c("Bt20","Bt11"))

nodes_sublineages <- data.frame(
    sublineage = c("VNI", "VNII", "VNBI", "VNBII",
                "VNIa-4", "VNIa-5", "VNIa-93",
                "VNIa-32", "VNIa-X", "VNIa-Y",
                "VNIb", "VNIc"),
    mrca = c(VNI_node, VNII_node, VNBI_node, VNBII_node,
            VNIa4_node, VNIa5_node, VNIa93_node,
            VNIa32_node, VNIaX_node, VNIaY_node,
            VNIb_node, VNIc_node))

Code
ggsave(merged_tree_small_plot, p, height = 6, width = 7, units = "in", dpi = 600)